---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
      name:  <unnamed>
       log:  /Users/jesse/Dropbox/voter_id/Replication/table_a4.log
  log type:  text
 opened on:  17 Aug 2020, 14:38:26

. 
. use "$path/nc_dataset.dta" if registered_pre_2008 == 1, clear

. 
. drop black white hispanic othernw registered_pre_2008 noid_p_2016 dem rep

. 
. // drop 2018 
. drop if inlist(election, 11, 12)
(7,934,628 observations deleted)

. 
. sort id election

. 
. replace voted = 0 if voted == .
(0 real changes made)

. 
. // replace voted with NA if under 18 / ineligible in that election year 
. drop if birth_year > 1998 & birth_year < 9999
(0 observations deleted)

. replace voted = . if (birth_year > 1990 & birth_year < 9999) & (election == 1 | election == 2) // 2008
(408 real changes made, 408 to missing)

. replace voted = . if (birth_year > 1992 & birth_year < 9999) & (election == 3 | election == 4) // 2010
(270 real changes made, 270 to missing)

. replace voted = . if (birth_year > 1994 & birth_year < 9999) & (election == 5 | election == 6) // 2012
(154 real changes made, 154 to missing)

. replace voted = . if (birth_year > 1996 & birth_year < 9999) & (election == 7 | election == 8) // 2014
(54 real changes made, 54 to missing)

. 
. ** create string encoding of possible pre-treatment outcome paths
. tostring voted, generate(outcome_path)
outcome_path generated as str1

. 
. by id: gen outcome_path_pretreat = outcome_path[1] + outcome_path[2] + ///
>                                                                 outcome_path[3] + outcome_path[4] + ///
>                                                                 outcome_path[5] + outcome_path[6] + ///
>                                                                 outcome_path[7] + outcome_path[8] 

. 
. gen count = 1

. 
. // keep general only
. keep if mod(election,2) == 0
(19,836,570 observations deleted)

. 
. replace voted = 0 if voted == .
(443 real changes made)

. 
. // construct age bins, give young voters who are ineligible for some election their own age bin
. gen age_bin = 11 if (birth_year > 1990 & birth_year < 9999)
(19,835,550 missing values generated)

. replace age_bin = 12 if (birth_year > 1992 & birth_year < 9999)
(675 real changes made)

. replace age_bin = 13 if (birth_year > 1994 & birth_year < 9999)
(385 real changes made)

. replace age_bin = 14 if (birth_year > 1996 & birth_year < 9999)
(135 real changes made)

. // give those who are eligible over the whole period their own age decile
. xtile age_decile = birth_year if (birth_year <= 1990), nq(10)

. replace age_bin = age_decile if (birth_year <= 1990)
(19,789,225 real changes made)

. drop age_decile

. 
. compress
  variable count was float now byte
  variable age_bin was float now byte
  (119,019,420 bytes saved)

. 
. bysort age_bin: sum birth_year

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 1

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |  1,984,440    1930.518    5.220344       1910       1937

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 2

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |  2,067,420    1941.795     2.24528       1938       1945

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 3

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |  1,890,745    1948.015    1.399603       1946       1950

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 4

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |  2,076,855    1953.031    1.413353       1951       1955

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 5

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |  2,223,165    1958.005    1.415639       1956       1960

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 6

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |  1,797,585     1962.48    1.118805       1961       1964

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 7

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |  2,018,870    1966.985    1.418931       1965       1969

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 8

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |  1,864,995    1971.864    1.413556       1970       1974

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 9

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |  2,070,180    1977.901    1.988504       1975       1981

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 10

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |  1,794,970    1985.126    2.221404       1982       1990

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 11

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |        345    1991.406    .4917588       1991       1992

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 12

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |        290    1993.603     .490027       1993       1994

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 13

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |        250      1995.6    .4908807       1995       1996

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = 14

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |        135    1997.444    .4987547       1997       1998

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-> age_bin = .

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
  birth_year |          0


. 
. sort id election

. 
. * vanilla diff in diff
. reghdfe voted treat, a(id election) cluster(id)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   = 19,836,570
Absorbing 2 HDFE groups                           F(   1,3967313) =    8056.22
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.6446
                                                  Adj R-squared   =     0.5557
                                                  Within R-sq.    =     0.0005
Number of clusters (id)      =  3,967,314         Root MSE        =     0.3161

                             (Std. Err. adjusted for 3,967,314 clusters in id)
------------------------------------------------------------------------------
             |               Robust
       voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       treat |  -.0986095   .0010986   -89.76   0.000    -.1007628   -.0964562
------------------------------------------------------------------------------

Absorbed degrees of freedom:
---------------------------------------------------------------+
 Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
-------------+-------------------------------------------------|
          id |            0         3967314        3967314 *   | 
    election |            4               5              1     | 
---------------------------------------------------------------+
* = fixed effect nested within cluster; treated as redundant for DoF computation

. local b1 = _b[treat]

. local se1 = _se[treat]

. local n1 = e(N)

. local nclust1 = e(N_clust)

. 
. * race by year
. reghdfe voted treat, a(id race_by_year) cluster(id)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   = 19,836,570
Absorbing 2 HDFE groups                           F(   1,3967313) =    7694.87
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.6450
                                                  Adj R-squared   =     0.5562
                                                  Within R-sq.    =     0.0005
Number of clusters (id)      =  3,967,314         Root MSE        =     0.3159

                             (Std. Err. adjusted for 3,967,314 clusters in id)
------------------------------------------------------------------------------
             |               Robust
       voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       treat |  -.0966377   .0011017   -87.72   0.000    -.0987969   -.0944785
------------------------------------------------------------------------------

Absorbed degrees of freedom:
---------------------------------------------------------------+
 Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
-------------+-------------------------------------------------|
          id |            0         3967314        3967314 *   | 
race_by_year |           24              25              1     | 
---------------------------------------------------------------+
* = fixed effect nested within cluster; treated as redundant for DoF computation

. local b2 = _b[treat]

. local se2 = _se[treat]

. local n2 = e(N)

. local nclust2 = e(N_clust)

. 
. * age by year 
. reghdfe voted treat, a(id age_by_year) cluster(id)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   = 19,790,245
Absorbing 2 HDFE groups                           F(   1,3958048) =     809.20
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.6526
                                                  Adj R-squared   =     0.5657
                                                  Within R-sq.    =     0.0000
Number of clusters (id)      =  3,958,049         Root MSE        =     0.3124

                             (Std. Err. adjusted for 3,958,049 clusters in id)
------------------------------------------------------------------------------
             |               Robust
       voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       treat |  -.0322713   .0011345   -28.45   0.000    -.0344948   -.0300478
------------------------------------------------------------------------------

Absorbed degrees of freedom:
---------------------------------------------------------------+
 Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
-------------+-------------------------------------------------|
          id |            0         3958049        3958049 *   | 
 age_by_year |          444             445              1     | 
---------------------------------------------------------------+
* = fixed effect nested within cluster; treated as redundant for DoF computation

. local b3 = _b[treat]

. local se3 = _se[treat]

. local n3 = e(N)

. local nclust3 = e(N_clust)

. 
. * age by race by year
. reghdfe voted treat, a(id race_by_age_by_year) cluster(id)
(dropped 55 singleton observations)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   = 19,790,190
Absorbing 2 HDFE groups                           F(   1,3958037) =     670.98
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.6532
                                                  Adj R-squared   =     0.5664
                                                  Within R-sq.    =     0.0000
Number of clusters (id)      =  3,958,038         Root MSE        =     0.3121

                             (Std. Err. adjusted for 3,958,038 clusters in id)
------------------------------------------------------------------------------
             |               Robust
       voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       treat |  -.0295183   .0011396   -25.90   0.000    -.0317519   -.0272848
------------------------------------------------------------------------------

Absorbed degrees of freedom:
----------------------------------------------------------------------+
        Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
--------------------+-------------------------------------------------|
                 id |            0         3958038        3958038 *   | 
race_by_age_by_year |         2074            2075              1     | 
----------------------------------------------------------------------+
* = fixed effect nested within cluster; treated as redundant for DoF computation

. local b4 = _b[treat]

. local se4 = _se[treat]

. local n4 = e(N)

. local nclust4 = e(N_clust)

. 
. preserve

. 
. 
. collapse (sum) count (mean) voted, by(outcome_path_pretreat no_dmv_match election)

. 
. *** get N by path by summing together n treated and n control
. by outcome_path: gen tot_tmp = count[6] if _n==6
(2,457 missing values generated)

. egen tot_treat = sum(tot_tmp), by(outcome_path)

. by outcome_path: replace tot_tmp = count[1] if _n == 1
(289 real changes made)

. egen tot = sum(tot_tmp), by(outcome_path)

. // N_voters is the number of voters who enter into the regression
. egen N_voters = sum(tot_tmp)

. // get number of elections
. unique election
Number of unique values of election is  5
Number of records is  2710

. // N is number of voters * number of elections
. gen long N = N_voters * r(unique)

. drop tot_tmp

. gen tot_control = tot - tot_treat

. 
. *** get weights for fw based on total n per stratum
. *** each stratum has 10 obs
. *** fw requires integers so need to round
. gen tot2 = round(tot/10)

. gen tot_treat2 = round(tot_treat/10)

. egen op = group(outcome_path)

. 
. reghdfe voted no_dmv_match [fw=tot_treat2], a(op election)
weight tot_treat2 can only contain strictly positive integers, but 840 zero values were found (will be dropped)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   =    100,640
Absorbing 2 HDFE groups                           F(   1, 100448) =     193.13
                                                  Prob > F        =     0.0000
                                                  R-squared       =     0.7657
                                                  Adj R-squared   =     0.7652
                                                  Within R-sq.    =     0.0019
                                                  Root MSE        =     0.2315

------------------------------------------------------------------------------
       voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
no_dmv_match |  -.0202847   .0014596   -13.90   0.000    -.0231455   -.0174238
-------------+----------------------------------------------------------------
    Absorbed |    F(190, 100448) =   1726.318   0.000             (Joint test)
------------------------------------------------------------------------------

Absorbed degrees of freedom:
---------------------------------------------------------------+
 Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
-------------+-------------------------------------------------|
          op |          187             187              0     | 
    election |            4               5              1     | 
---------------------------------------------------------------+

. local b5 = _b[no_dmv_match]

. local se5 = _se[no_dmv_match]

. local n5 = N

. local nclust5 = N_voters

. 
. * match on pre-treatment turnout path and race
. 
. restore

. preserve

. 
. 
. collapse (sum) count (mean) voted, by(outcome_path_pretreat no_dmv_match election race_string)

. 
. *** get N by path by summing together n treated and n control
. sort outcome_path race_string no_dmv_match

. by outcome_path race_string: gen tot_tmp = count[6] if _n==6
(8,989 missing values generated)

. egen tot_treat = sum(tot_tmp), by(outcome_path race_string)

. by outcome_path race_string: replace tot_tmp = count[1] if _n == 1
(1,197 real changes made)

. egen tot = sum(tot_tmp), by(outcome_path race_string)

. // N_voters is the number of voters who enter into the regression
. egen N_voters = sum(tot_tmp)

. // get number of elections
. unique election
Number of unique values of election is  5
Number of records is  9740

. // N is number of voters * number of elections
. gen long N = N_voters * r(unique)

. drop tot_tmp

. gen tot_control = tot - tot_treat

. 
. *** get weights for fw based on total n per stratum
. *** each stratum has 10 obs
. *** fw requires integers so need to round
. gen tot2 = round(tot/10)

. gen tot_treat2 = round(tot_treat/10)

. egen op = group(outcome_path race_string)

. 
. reghdfe voted no_dmv_match [fw=tot_treat2], a(op election)
weight tot_treat2 can only contain strictly positive integers, but 5790 zero values were found (will be dropped)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   =    100,380
Absorbing 2 HDFE groups                           F(   1,  99980) =     178.45
                                                  Prob > F        =     0.0000
                                                  R-squared       =     0.7678
                                                  Adj R-squared   =     0.7668
                                                  Within R-sq.    =     0.0018
                                                  Root MSE        =     0.2309

------------------------------------------------------------------------------
       voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
no_dmv_match |  -.0194727   .0014577   -13.36   0.000    -.0223298   -.0166156
-------------+----------------------------------------------------------------
    Absorbed |     F(398, 99980) =    830.000   0.000             (Joint test)
------------------------------------------------------------------------------

Absorbed degrees of freedom:
---------------------------------------------------------------+
 Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
-------------+-------------------------------------------------|
          op |          395             395              0     | 
    election |            4               5              1     | 
---------------------------------------------------------------+

. local b6 = _b[no_dmv_match]

. local se6 = _se[no_dmv_match]

. local n6 = N

. local nclust6 = N_voters

. 
. restore

. 
. collapse (sum) count (mean) voted, by(outcome_path_pretreat no_dmv_match election race_string age_bin)

. 
. // drop if age is missing
. drop if age_bin == .
(1,645 observations deleted)

. 
. *** get N by path by summing together n treated and n control
. sort outcome_path race_string age_bin no_dmv_match election

. by outcome_path race_string age_bin: gen tot_tmp = count[6] if _n==6
(57,922 missing values generated)

. egen tot_treat = sum(tot_tmp), by(outcome_path race_string age_bin)

. by outcome_path race_string age_bin: replace tot_tmp = count[1] if _n == 1
(8,842 real changes made)

. egen tot = sum(tot_tmp), by(outcome_path race_string age_bin)

. // N_voters is the number of voters who enter into the regression
. egen N_voters = sum(tot_tmp)

. // get number of elections
. unique election
Number of unique values of election is  5
Number of records is  61350

. // N is number of voters * number of elections
. gen long N = N_voters * r(unique)

. drop tot_tmp

. gen tot_control = tot - tot_treat

. 
. *** get weights for fw based on total n per stratum
. *** each stratum has 10 obs
. *** fw requires integers so need to round
. gen tot2 = round(tot/10)

. gen tot_treat2 = round(tot_treat/10)

. egen op = group(outcome_path race_string age_bin)

. 
. reghdfe voted no_dmv_match [fw=tot_treat2], a(op election)
weight tot_treat2 can only contain strictly positive integers, but 49340 zero values were found (will be dropped)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   =     91,620
Absorbing 2 HDFE groups                           F(   1,  90414) =      40.94
                                                  Prob > F        =     0.0000
                                                  R-squared       =     0.7719
                                                  Adj R-squared   =     0.7689
                                                  Within R-sq.    =     0.0005
                                                  Root MSE        =     0.2303

------------------------------------------------------------------------------
       voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
no_dmv_match |  -.0097362   .0015217    -6.40   0.000    -.0127187   -.0067536
-------------+----------------------------------------------------------------
    Absorbed |    F(1204, 90414) =    254.145   0.000             (Joint test)
------------------------------------------------------------------------------

Absorbed degrees of freedom:
---------------------------------------------------------------+
 Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
-------------+-------------------------------------------------|
          op |         1201            1201              0     | 
    election |            4               5              1     | 
---------------------------------------------------------------+

. local b7 = _b[no_dmv_match]

. local se7 = _se[no_dmv_match]

. local n7 = N 

. local nclust7 = N_voters

. 
. log close
      name:  <unnamed>
       log:  /Users/jesse/Dropbox/voter_id/Replication/table_a4.log
  log type:  text
 closed on:  17 Aug 2020, 14:52:07
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
